import pandas as pd
import numpy as np
import datetime as dt
import seaborn
import matplotlib.pyplot as plt
import plotly.graph_objects as go
#Open a dataBase
pizzaria_df = pd.read_csv(r"D:\Projetos bancos de dados\Pizzaria\Datafiniti_Pizza_Restaurants_and_the_Pizza_They_Sell_May19.csv")
#cleaning unnecessary data
pizzaria_df = pizzaria_df.drop(['menuPageURL','postalCode','keys','dateAdded','dateUpdated','menus.dateSeen'],axis=1)
display(pizzaria_df)
| id | address | categories | primaryCategories | city | country | latitude | longitude | menus.amountMax | menus.amountMin | menus.currency | menus.description | menus.name | name | priceRangeCurrency | priceRangeMin | priceRangeMax | province | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AVz3Y-7h3D1zeR_xDAqm | 4203 E Kiehl Ave | Pizza,Restaurant,American restaurants,Pizza Pl... | Accommodation & Food Services | Sherwood | US | 34.832300 | -92.183800 | 7.98 | 7.98 | USD | NaN | Cheese Pizza | Shotgun Dans Pizza | USD | 0 | 25 | AR |
| 1 | AVweGPFF_7pvs4fzAAzQ | 25 E Camelback Rd | Pizza,Pizza Place,Restaurants | Accommodation & Food Services | Phoenix | US | 33.509266 | -112.073044 | 6.00 | 6.00 | USD | NaN | Pizza Cookie | Sauce Pizza Wine | USD | 0 | 25 | AZ |
| 2 | AVwdRGa9_7pvs4fz4E3K | 3703 Paxton Ave | Restaurant,Pizza Place,Restaurants | Accommodation & Food Services | Cincinnati | US | 39.144883 | -84.432685 | 6.49 | 6.49 | USD | a saucelessampcomma double cheese pizza with a... | Pizza Blanca | Mios Pizzeria | USD | 0 | 25 | OH |
| 3 | AVwdX4psIN2L1WUfvJB1 | 30495 John R Rd | Pizza,Carry-out food,Pizza Place,Restaurants | Accommodation & Food Services | Madison Heights | US | 42.516669 | -83.106630 | 5.99 | 5.99 | USD | NaN | Small Pizza | Hungry Howies Pizza | USD | 25 | 40 | MI |
| 4 | AVwdaeTtkufWRAb55pSH | 3600 Eastern Ave | Pizza,American restaurants,Pizza Place,Pizza e... | Accommodation & Food Services | Baltimore | US | 39.286630 | -76.566984 | 5.49 | 5.49 | USD | NaN | Pizza Sub | Spartan Pizzeria | USD | 0 | 25 | MD |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | AVwdKXFYByjofQCxmIyZ | 1126 E Lovejoy St | Pizza Place | Accommodation & Food Services | Buffalo | US | 42.889759 | -78.806747 | 37.10 | 7.10 | USD | NaN | Super Steak Pizza | Carbone's Pizzeria | USD | 0 | 25 | NY |
| 9996 | AVwdKXFYByjofQCxmIyZ | 1126 E Lovejoy St | Pizza Place | Accommodation & Food Services | Buffalo | US | 42.889759 | -78.806747 | 10.10 | 10.10 | USD | NaN | Large Cheese & 1 Item Pizza | Carbone's Pizzeria | USD | 0 | 25 | NY |
| 9997 | AVz6kzsQFcQ3k02bDhwQ | 3641 E Main St | Pizza Place,Restaurants | Accommodation & Food Services | Saint Charles | US | 41.921300 | -88.275300 | 8.45 | 8.45 | USD | NaN | Individual 6" Pepperoni Stuffed Deep Dish Pizza | Giordano's Pizza | USD | 0 | 25 | IL |
| 9998 | AVz6kzsQFcQ3k02bDhwQ | 3641 E Main St | Pizza Place,Restaurants | Accommodation & Food Services | Saint Charles | US | 41.921300 | -88.275300 | 8.45 | 8.45 | USD | NaN | Individual 6" Cheese Stuffed Deep Dish Pizza | Giordano's Pizza | USD | 0 | 25 | IL |
| 9999 | AVwdG9zTkufWRAb52Y2A | 7460 W Lake Mead Blvd | Pizza Place,Restaurant,Fast Food,Pizza | Accommodation & Food Services | Las Vegas | US | 36.196180 | -115.256240 | 8.79 | 8.79 | USD | NaN | King Arthur's Supreme Pizza | Round Table Pizza | USD | 0 | 25 | NV |
10000 rows × 18 columns
#Question 1:
'''
What are the categories of resturants?
'''
categories = pizzaria_df['primaryCategories'].value_counts().reset_index().rename(columns ={'index' :'Primary Categories','primaryCategories':'Values'},\
index={0:1,1:2,2:3,3:4,4:5,5:6,6:7,7:8})
display(categories)
'''
Analysis:
you can see that there are several pizzerias that have different categories, such as
they serve pizzas and function as restaurants
'''
| Primary Categories | Values | |
|---|---|---|
| 1 | Accommodation & Food Services | 9909 |
| 2 | Management of Companies & Enterprises | 43 |
| 3 | Accommodation & Food Services,Management of Co... | 23 |
| 4 | Retail,Accommodation & Food Services | 11 |
| 5 | Retail | 9 |
| 6 | Wholesale Trade,Accommodation & Food Services | 2 |
| 7 | Educational Services,Accommodation & Food Serv... | 2 |
| 8 | Wholesale Trade,Accommodation & Food Services,... | 1 |
'\nAnalysis:\nyou can see that there are several pizzerias that have different categories, such as\nthey serve pizzas and function as restaurants \n'
#Question 2:
'''
What are the 10 cities with the most pizzerias by province?
'''
cities_for_provinces = pizzaria_df[['city','province']].value_counts().reset_index().rename(columns={0:'restaurants by city'},\
index={0:1,1:2,2:3,3:4,4:5,5:6,7:8,9:10})
cities_for_provinces_plot = pizzaria_df[['city']].value_counts().head(10).reset_index().rename(columns={0:'restaurants by city'},\
index={0:1,1:2,2:3,3:4,4:5,5:6,7:8,9:10}).plot('city','restaurants by city',kind='bar')
display(cities_for_provinces.head(10))
print('==='*40)
| city | province | restaurants by city | |
|---|---|---|---|
| 1 | New York | NY | 655 |
| 2 | Brooklyn | NY | 460 |
| 3 | Los Angeles | CA | 193 |
| 4 | Buffalo | NY | 178 |
| 5 | Philadelphia | PA | 140 |
| 6 | Seattle | WA | 135 |
| 6 | Chicago | IL | 133 |
| 8 | Flushing | NY | 124 |
| 8 | San Francisco | CA | 110 |
| 10 | Bronx | NY | 88 |
========================================================================================================================
#Question 3:
'''
top 10 frequented provinces
'''
provinces = pizzaria_df['province'].value_counts().reset_index().rename(columns={'index':'provinces','province':'frequency '},\
index={0:1,1:2,2:3,3:4,4:5,5:6,7:8,9:10})
plot_df= pizzaria_df['province'].value_counts().head(10).reset_index().rename(columns={'index':'provinces','province':'frequency'},\
index={0:1,1:2,2:3,3:4,4:5,5:6,7:8,9:10}).plot('provinces','frequency',kind ='barh')
display(provinces.head(10))
print('==='*40)
| provinces | frequency | |
|---|---|---|
| 1 | NY | 2640 |
| 2 | CA | 1386 |
| 3 | PA | 814 |
| 4 | FL | 601 |
| 5 | TX | 485 |
| 6 | IL | 381 |
| 6 | MI | 366 |
| 8 | OH | 324 |
| 8 | AZ | 288 |
| 10 | WA | 262 |
========================================================================================================================
#Question 4:
'''
'''
print('total franchises in the data set: {}'.format(len(np.unique(pizzaria_df['id']))))
restaurant_list = pizzaria_df['name'].value_counts().reset_index().rename(columns ={'index':'Name','name':'number of branches'},\
index={0:1,1:2,2:3,3:4,4:5,5:6,7:8,9:10})
pizzaria_df['name'].value_counts().head(10).reset_index().rename(columns ={'index':'Name','name':'Values'}).plot('Name','Values',kind ='barh')
display(restaurant_list.head(10))
print('==='*40)
total franchises in the data set: 2285
| Name | number of branches | |
|---|---|---|
| 1 | California Pizza Kitchen | 261 |
| 2 | Papa Murphy's | 208 |
| 3 | Papa John's Pizza | 124 |
| 4 | Papa Murphys | 69 |
| 5 | Gino's Pizzeria | 68 |
| 6 | Pirates Pizza | 55 |
| 6 | Olympia Pizza and Spaghetti House | 54 |
| 8 | Round Table Pizza | 54 |
| 8 | Domino's Pizza | 54 |
| 10 | Pizza Hut | 51 |
========================================================================================================================
#Question 5:
'''
Top 10 Pizza flavors
'''
#Removed the 'pizza' line as it is generalized
menu_pizza_df = pizzaria_df['menus.name'].value_counts().reset_index()
menu_pizza_df = menu_pizza_df.drop(menu_pizza_df.index[[4]])
menu_pizza_df = menu_pizza_df.rename(columns ={'index':'Name'},index={0:1,1:2,2:3,3:4})
display(menu_pizza_df.head(10))
print('==='*40)
#I plotted by stages to remove the 'pizza' line
menu_pizza_grapic_df = pizzaria_df['menus.name'].value_counts().head(10).reset_index().rename(columns ={'index':'Name'})
menu_pizza_grapic_df = menu_pizza_grapic_df.drop(menu_pizza_grapic_df.index[[4]])
menu_pizza_grapic_df.plot('Name','menus.name',kind='bar')
| Name | menus.name | |
|---|---|---|
| 1 | Cheese Pizza | 299 |
| 2 | White Pizza | 225 |
| 3 | Hawaiian Pizza | 149 |
| 4 | Buffalo Chicken Pizza | 112 |
| 5 | Margherita Pizza | 104 |
| 6 | Veggie Pizza | 95 |
| 7 | Bbq Chicken Pizza | 81 |
| 8 | Greek Pizza | 72 |
| 9 | Pizza Sub | 71 |
| 10 | Pizza Burger | 70 |
========================================================================================================================
<AxesSubplot:xlabel='Name'>
#Question 6:
'''
where can i find cheese pizza:
'''
chesse_pizza_df = pizzaria_df[pizzaria_df['menus.name'] == 'Cheese Pizza']
chesse_pizza_df.drop_duplicates('id',inplace = True)
chesse_pizza_df.shape
fig = go.Figure(data=go.Scattergeo(
lon = chesse_pizza_df['longitude'],
lat = chesse_pizza_df['latitude'],
text = chesse_pizza_df['name'] + ' :-' + chesse_pizza_df['province'],
mode = 'markers',
marker_color = 'red',
))
fig.update_layout(
title = 'Cheese Pizza',
geo_scope='usa',
)
fig.show()
C:\Users\Pichau\anaconda3\lib\site-packages\pandas\util\_decorators.py:311: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return func(*args, **kwargs)
#Question 7:
'''
where ca